# This file accesses the World Bank API and downloads data on outstanding WB files 
# to compare, for each country, the locations of WB projects with investments 
# as selected by the model

######### SETUP, LOAD PACKAGES #########
import json
import urllib.request
import pandas as pd
import geopandas as gpd
from shapely.geometry import Point

# http://search.worldbank.org/api/v2/projects
DIR = '/Users/nicolegorton/Dropbox/World Bank/OTN_LAC/proj_code/data'

# list of countries to get projects
countries = ['Brazil', 'Chile', 'Panama', 'Nicaragua', 'Guatemala', 'Peru', 'Argentina', 'Colombia', 'Paraguay' , 'Venezuela' , 'Bolivia', 'El+Salvador', 'Mexico','Costa+Rica' , 'Ecuador', 'Uruguay']

# construct an empty dataframe that we will fill up
projects_and_locations = pd.DataFrame(columns = ['country', 'year', 'name', 'amount', 'location name', 'location country', 'location lat', 'location lon']) 

# loop through each year of data 
for y1 in range(2005,2021):
	for c in range(len(countries)):
		print(c)
		url= 'http://search.worldbank.org/api/v2/projects?format=json&mjsector_exact=Transportation&regionname_exact=Latin+America+and+Caribbean&countryshortname_exact='+countries[c]+'&source=IBRD&frmYear=' + str(y1)+ '&toYear=' + str(y1) +'&kw=N'
		print(url)

		# url='http://search.worldbank.org/api/v2/projects?format=json&mjsector_exact=Transportation&countryshortname_exact=Mexico&source=IBRD&frmYear=2010&toYear=2010'
		data = urllib.request.urlopen(url).read().decode()

		# parse json object
		obj = json.loads(data)

		with urllib.request.urlopen(url) as url:
			data = json.loads(url.read().decode())
			print(data)
			# Loop through all the projects and get the name, amount, and location 
			# get list of all project names 
			listofproj = list(obj['projects'].keys())
			country = countries[c]

			for i in range(len(listofproj)):
				name = obj['projects'][listofproj[i]]['project_name']
				amount = obj['projects'][listofproj[i]]['totalamt']
				year = obj['projects'][listofproj[i]]['approvalfy']
				
				try:
					if len(obj['projects'][listofproj[i]]['locations'])>0:
						for l in range(len(obj['projects'][listofproj[i]]['locations'])):
							locname = obj['projects'][listofproj[i]]['locations'][l]['geoLocName']
							loclat = obj['projects'][listofproj[i]]['locations'][l]['latitude']
							loclon = obj['projects'][listofproj[i]]['locations'][l]['longitude']
							loccountry = obj['projects'][listofproj[i]]['locations'][l]['country']
							projects_and_locations = projects_and_locations.append({'country' : country, 'year' : year, 
								'name' : name, 'amount' :  amount, 'location name' : locname, 
								'location country' : loccountry, 'location lat' : loclat, 'location lon' : loclon }, ignore_index = True) 
				except:
					projects_and_locations = projects_and_locations.append({'country' : country, 'year' : year, 'name' : name, 'amount' :  amount}, ignore_index = True) 

		projects_and_locations.to_csv(DIR+'/wb_api.csv')

# load in and aggregate by group 
wbapi = pd.read_csv(DIR+'/wb_api.csv')

# filter out projects where location is missing - we can't use these 
wbapi  = wbapi[wbapi['location name'].notnull()]
count_by_project = wbapi[['name', 'year', 'amount']].groupby(['name', 'year']).count()
amount_by_project = wbapi[['name', 'year', 'amount']].groupby(['name', 'year']).min()
amount_by_project['amount'] = amount_by_project['amount'].apply(lambda x: int(x.replace(',', '')))
amount_per_location = amount_by_project/count_by_project

# merge back onto original list
wbapi = pd.merge(wbapi, amount_per_location, on=['name', 'year'], how='left')

# aggregate amount by location for mapping 
amount_by_coords = wbapi[['location lat', 'location lon', 'amount_y']].groupby(['location lat', 'location lon']).sum().reset_index()
geometry = [Point(xy) for xy in zip(amount_by_coords['location lon'], amount_by_coords['location lat'])]

# export a shapefile which we will use to create the map 
crs = {'init': 'epsg:4326'}
gdf = gpd.GeoDataFrame(amount_by_coords, crs=crs, geometry = geometry)
gdf.to_file(DIR+'/wb_api_bycoords.shp')


